{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import os\n",
    "from os.path import dirname, realpath, join\n",
    "base_dir = dirname(dirname(os.getcwd()))\n",
    "import itertools\n",
    "import pandas as pd\n",
    "from os.path import join\n",
    "base_dir\n",
    "\n",
    "sys.path.insert(0, base_dir)\n",
    "from config_path import PROSTATE_DATA_PATH, PLOTS_PATH, GENE_PATH\n",
    "from data.data_access import Data\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Mutations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "mut_file = join(PROSTATE_DATA_PATH, 'processed/P1000_final_analysis_set_cross_important_only.csv')\n",
    "mut_df = pd.read_csv(mut_file, index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    1003\n",
       "1       8\n",
       "Name: MAML3, dtype: int64"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "maml3_mut = mut_df['MAML3'].astype(int)\n",
    "maml3_mut.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7897334649555775"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "100*8/1013."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Copy number "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "selected_genes = 'tcga_prostate_expressed_genes_and_cancer_genes_and_memebr_of_reactome.csv'\n",
    "data_params = {'id': 'ALL', 'type': 'prostate_paper',\n",
    "             'params': {\n",
    "                 'data_type': ['cnv'],\n",
    "                 'account_for_data_type' : None,\n",
    "                 'drop_AR': False,\n",
    "                 'cnv_levels': 5,\n",
    "                 'mut_binary': False,\n",
    "                 'balanced_data': False,\n",
    "                 'combine_type': 'union',  # intersection\n",
    "                 'use_coding_genes_only': True,\n",
    "                 'selected_genes': selected_genes,\n",
    "                 'selected_samples': None,\n",
    "                 'training_split': 0,\n",
    "             }\n",
    "             }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:root:some genes dont exist in the original data set\n"
     ]
    }
   ],
   "source": [
    "data_adapter = Data(**data_params)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "x, y, info, col = data_adapter.get_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>RNF14</th>\n",
       "      <th>OR52B2</th>\n",
       "      <th>AGL</th>\n",
       "      <th>AGK</th>\n",
       "      <th>NCBP1</th>\n",
       "      <th>NCBP2</th>\n",
       "      <th>HSPA4</th>\n",
       "      <th>CELA2A</th>\n",
       "      <th>FTMT</th>\n",
       "      <th>AGA</th>\n",
       "      <th>...</th>\n",
       "      <th>PDCD6IP</th>\n",
       "      <th>BBS7</th>\n",
       "      <th>AP4M1</th>\n",
       "      <th>NPY5R</th>\n",
       "      <th>ACTL6A</th>\n",
       "      <th>GNGT1</th>\n",
       "      <th>AIP</th>\n",
       "      <th>WNT16</th>\n",
       "      <th>SELP</th>\n",
       "      <th>OR4A5</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>cnv</th>\n",
       "      <th>cnv</th>\n",
       "      <th>cnv</th>\n",
       "      <th>cnv</th>\n",
       "      <th>cnv</th>\n",
       "      <th>cnv</th>\n",
       "      <th>cnv</th>\n",
       "      <th>cnv</th>\n",
       "      <th>cnv</th>\n",
       "      <th>cnv</th>\n",
       "      <th>...</th>\n",
       "      <th>cnv</th>\n",
       "      <th>cnv</th>\n",
       "      <th>cnv</th>\n",
       "      <th>cnv</th>\n",
       "      <th>cnv</th>\n",
       "      <th>cnv</th>\n",
       "      <th>cnv</th>\n",
       "      <th>cnv</th>\n",
       "      <th>cnv</th>\n",
       "      <th>cnv</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>00-029N9_LN</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>01-087MM_BONE</th>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>01-095N1_LN</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>01-120A1_LIVER</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>02-083E1_LN</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 3430 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "               RNF14 OR52B2  AGL  AGK NCBP1 NCBP2 HSPA4 CELA2A FTMT  AGA  \\\n",
       "                 cnv    cnv  cnv  cnv   cnv   cnv   cnv    cnv  cnv  cnv   \n",
       "00-029N9_LN      0.0    0.0  0.0  0.0   0.0   0.0  -2.0    0.0  0.0  0.0   \n",
       "01-087MM_BONE   -1.0    0.0  1.0  0.0   0.0   1.0   1.0    0.0  1.0 -1.0   \n",
       "01-095N1_LN      0.0    0.0  0.0  0.0   0.0   0.0   0.0    1.0  1.0  0.0   \n",
       "01-120A1_LIVER   0.0    0.0  0.0  0.0   0.0   0.0   1.0    0.0 -2.0  0.0   \n",
       "02-083E1_LN      0.0    0.0  0.0  0.0   0.0   0.0  -1.0    0.0  2.0  0.0   \n",
       "\n",
       "                ...  PDCD6IP BBS7 AP4M1 NPY5R ACTL6A GNGT1  AIP WNT16 SELP  \\\n",
       "                ...      cnv  cnv   cnv   cnv    cnv   cnv  cnv   cnv  cnv   \n",
       "00-029N9_LN     ...      0.0  0.0   0.0   0.0    0.0   0.0  0.0   0.0  0.0   \n",
       "01-087MM_BONE   ...     -1.0 -1.0   0.0   0.0   -1.0   0.0  0.0   0.0  0.0   \n",
       "01-095N1_LN     ...      1.0  1.0   0.0   1.0    1.0   0.0  0.0   0.0  0.0   \n",
       "01-120A1_LIVER  ...      0.0  0.0   1.0   0.0    0.0   0.0  0.0   0.0  0.0   \n",
       "02-083E1_LN     ...      0.0  0.0   0.0   2.0    0.0   0.0  0.0  -1.0  0.0   \n",
       "\n",
       "               OR4A5  \n",
       "                 cnv  \n",
       "00-029N9_LN      0.0  \n",
       "01-087MM_BONE    0.0  \n",
       "01-095N1_LN      0.0  \n",
       "01-120A1_LIVER   0.0  \n",
       "02-083E1_LN      0.0  \n",
       "\n",
       "[5 rows x 3430 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_df = pd.DataFrame(x, columns = col, index=info)\n",
    "x_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       " 0.0    785\n",
       "-1.0    104\n",
       " 1.0     89\n",
       " 2.0     25\n",
       "-2.0     10\n",
       "Name: (MAML3, cnv), dtype: int64"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_df[('MAML3', 'cnv')].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train, x_test, y_train, y_test, info_train, info_test, columns = data_adapter.get_train_test()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train_df = pd.DataFrame(x_train, columns = columns, index=info_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       " 0.0    708\n",
       "-1.0     95\n",
       " 1.0     77\n",
       " 2.0     22\n",
       "-2.0      9\n",
       "Name: (MAML3, cnv), dtype: int64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_train_df[('MAML3', 'cnv')].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2.4444444444444446"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "22/9."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2.5"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "25/10."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Correlation with CNV burden"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Patient.ID\n",
       "AAPC-STID0000011640-Tumor-SM-2XU1H    0.010487\n",
       "AAPC-STID0000021561-Tumor-SM-3RVWB    0.135831\n",
       "AAPC-STID0000011949-Tumor-SM-2XU1I    0.190097\n",
       "AAPC-STID0000021610-Tumor-SM-2XU13    0.054238\n",
       "AAPC-STID0000021537-Tumor-SM-3RVW7    0.054551\n",
       "Name: Fraction of genome altered, dtype: float64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## get cnv burden \n",
    "base_dir = join(PROSTATE_DATA_PATH, 'raw_data')\n",
    "filename = '41588_2018_78_MOESM5_ESM.xlsx'\n",
    "data= pd.read_excel(join(base_dir, filename), skiprows=2, index_col=1)\n",
    "cnv= data['Fraction of genome altered']\n",
    "cnv.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "maml3_cnv = x_df[('MAML3', 'cnv')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       " 0    785\n",
       "-1    104\n",
       " 1     89\n",
       " 2     25\n",
       "-2     10\n",
       "Name: (MAML3, cnv), dtype: int64"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "maml3_cnv =maml3_cnv.astype(int)\n",
    "maml3_cnv.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# maml3_df.columns= maml3_df.columns.droplevel(0)\n",
    "maml3_cnv = maml3_cnv.replace({ -1:0, 1:0, 2:1, -2:1})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "35"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "maml3_cnv.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "00-029N9_LN       1\n",
       "01-087MM_BONE     0\n",
       "01-095N1_LN       0\n",
       "01-120A1_LIVER    1\n",
       "02-083E1_LN       1\n",
       "Name: (MAML3, cnv), dtype: int64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "maml3_cnv.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Tumor_Sample_Barcode\n",
       "00-029N9_LN       0\n",
       "01-087MM_BONE     0\n",
       "01-095N1_LN       0\n",
       "01-120A1_LIVER    0\n",
       "02-083E1_LN       0\n",
       "Name: MAML3, dtype: int64"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "maml3_mut.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "35"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "maml3_cnv.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "maml3_event = maml3_mut+ maml3_cnv\n",
    "maml3_event =maml3_event >0.\n",
    "maml3_event_df= maml3_event.to_frame()\n",
    "maml3_event_df.columns=['MAML3']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "cnv_df = cnv.to_frame()\n",
    "data = cnv_df.join(maml3_event_df, how='inner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Fraction of genome altered</th>\n",
       "      <th>MAML3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAPC-STID0000011640-Tumor-SM-2XU1H</th>\n",
       "      <td>0.010487</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAPC-STID0000021561-Tumor-SM-3RVWB</th>\n",
       "      <td>0.135831</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAPC-STID0000011949-Tumor-SM-2XU1I</th>\n",
       "      <td>0.190097</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAPC-STID0000021610-Tumor-SM-2XU13</th>\n",
       "      <td>0.054238</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAPC-STID0000021537-Tumor-SM-3RVW7</th>\n",
       "      <td>0.054551</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                    Fraction of genome altered  MAML3\n",
       "AAPC-STID0000011640-Tumor-SM-2XU1H                    0.010487  False\n",
       "AAPC-STID0000021561-Tumor-SM-3RVWB                    0.135831  False\n",
       "AAPC-STID0000011949-Tumor-SM-2XU1I                    0.190097  False\n",
       "AAPC-STID0000021610-Tumor-SM-2XU13                    0.054238  False\n",
       "AAPC-STID0000021537-Tumor-SM-3RVW7                    0.054551  False"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(989, 2)"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PointbiserialrResult(correlation=-0.005881896435257775, pvalue=0.8534300466028149)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from scipy import stats\n",
    "y=data['Fraction of genome altered'].values\n",
    "x=data['MAML3'].values\n",
    "\n",
    "stats.pointbiserialr(x,y)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>cnv</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>00-029N9_LN</th>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>01-087MM_BONE</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>01-095N1_LN</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>01-120A1_LIVER</th>\n",
       "      <td>-2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>02-083E1_LN</th>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                cnv\n",
       "00-029N9_LN     2.0\n",
       "01-087MM_BONE   0.0\n",
       "01-095N1_LN     0.0\n",
       "01-120A1_LIVER -2.0\n",
       "02-083E1_LN     2.0"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "maml3_cnv_del_amps = x_df[('MAML3', 'cnv')].to_frame()\n",
    "maml3_cnv_del_amps.columns= maml3_cnv_del_amps.columns.droplevel(0)\n",
    "maml3_cnv_del_amps.head()\n",
    "# maml3_cnv_del_amps.replace()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_del_amps = cnv_df.join(maml3_cnv_del_amps, how='inner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Fraction of genome altered</th>\n",
       "      <th>cnv</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAPC-STID0000011640-Tumor-SM-2XU1H</th>\n",
       "      <td>0.010487</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAPC-STID0000021561-Tumor-SM-3RVWB</th>\n",
       "      <td>0.135831</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAPC-STID0000011949-Tumor-SM-2XU1I</th>\n",
       "      <td>0.190097</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAPC-STID0000021610-Tumor-SM-2XU13</th>\n",
       "      <td>0.054238</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAPC-STID0000021537-Tumor-SM-3RVW7</th>\n",
       "      <td>0.054551</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                    Fraction of genome altered  cnv\n",
       "AAPC-STID0000011640-Tumor-SM-2XU1H                    0.010487  0.0\n",
       "AAPC-STID0000021561-Tumor-SM-3RVWB                    0.135831  0.0\n",
       "AAPC-STID0000011949-Tumor-SM-2XU1I                    0.190097  0.0\n",
       "AAPC-STID0000021610-Tumor-SM-2XU13                    0.054238  0.0\n",
       "AAPC-STID0000021537-Tumor-SM-3RVW7                    0.054551  1.0"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_del_amps.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(25, 10)"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ind_amp = data_del_amps['cnv'] >1\n",
    "ind_del = data_del_amps['cnv'] <-1\n",
    "sum(ind_amp), sum(ind_del)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "cnv_amp = data_del_amps.loc[ind_amp, 'Fraction of genome altered']\n",
    "cnv_del = data_del_amps.loc[ind_del, 'Fraction of genome altered']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2.244326728646481, 0.03163677866754105)"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from scipy.stats import ttest_ind\n",
    "\n",
    "t_stat, p = ttest_ind(cnv_amp.values, cnv_del.values)\n",
    "t_stat, p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:min_env]",
   "language": "python",
   "name": "conda-env-min_env-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
